*12-06-23_AD_data_merge.do
clear

cd "$filetree"

**Run initial EUGene do file
do "12-05-28_dyad.out.do"
*Note: the do file says that ongoing disputes are dropped, but I set EUGene to have "No Exclusions" and there are ongoing MIDs (cwongo) in the dataset, so this must be a typo. 

order ccode1 ccode2 year abbrev1 abbrev2 cwmid cwongo cwfatald polity21 polity22

*Dyadic Democracy Variables
g dml=min(polity21, polity22) if polity21~=. & polity22~=.
label variable dml "DemocracyLow"
g dmh=max(polity21, polity22) if polity21~=. & polity22~=.
label variable dmh "DemocracyHigh"

g mjpw=max(majpow1, majpow2) if majpow1~=. & majpow2~=.
label variable mjpw "Major power"


*contigl defined as contiguity by land
g contigl=0 if contig~=1 & contig~=.
replace contigl=1 if contig==1
label variable contigl "Land Contiguity"


gen lndist=log(distance)

g cpl = cap_1 if cap_1 < cap_2 & cap_1~=. & cap_2~=.
replace cpl = cap_2 if cap_2 <= cap_1 & cap_1~=. & cap_2~=.
g cph = cap_1 if cap_1 > cap_2 & cap_1~=. & cap_2~=.
replace cph = cap_2 if cap_2 >= cap_1 & cap_1~=. & cap_2~=.
g lncprt=ln(cph/cpl) if cph~=. & cpl~=.
*drop cap_1 cap_2 cpl cph
label variable lncprt "Relative capability"

*calculate probability of winning using CINCs*
gen pwin_cap=cph/(cph+cpl)

gen dyadid=(ccode1*1000)+ccode2

sort ccode1 ccode2 year
order ccode1 ccode2 year abbrev1 abbrev2 cwmid cwongo cwfatald dyadid pwin_cap lncprt mjpw dml dmh contigl lndist
save "12-05-28_dem_CIE_analysis.dta", replace


*sort ccode1 ccode2 year
*sort ccode1 year
*merge m:1 ccode1 year using "Other_files/p4v2010_1.dta"
*tab polity21 polity1
**confirmed: the polity4 variables provided by EUGene are up to date

**Dependent Variables
sort dyadid year
sort dyadid
by dyadid: gen midonsl=cwmid[_n+1] 
by dyadid: replace midonsl=. if cwongo[_n+1]==1
label variable midonsl "MID onset lead"

sort dyadid year
sort dyadid
by dyadid: gen midongl=cwongo[_n+1] 
by dyadid: replace midongl=1 if cwmid[_n+1]==1
label variable midongl "MID ongoing lead"

sort dyadid year
sort dyadid
by dyadid: gen fmidonsl=1 if midonsl==1 & cwfatald[_n+1]>0 & cwfatald[_n+1]~=.
by dyadid: replace fmidonsl=0 if midonsl==0 | cwfatald[_n+1]==0 
replace fmidonsl=. if cwongo[_n+1]==1
label variable fmidons "Fatal MID onset lead"

sort dyadid year
sort dyadid
by dyadid: gen fmidongl=1 if midongl==1 & cwfatald[_n+1]>0 & cwfatald[_n+1]~=.
by dyadid: replace fmidongl=0 if midongl==0 | cwfatald[_n+1]==0 
label variable fmidongl "Fatal MID ongoing lead"

sort dyadid year
sort dyadid
by dyadid: gen warl=1 if midongl==1 & cwfatald[_n+1]==6
by dyadid: replace warl=0 if midongl==0 | cwfatald[_n+1]<6 
label variable warl "War onset or ongoing lead"

 tab midonsl midongl, m
  tab fmidonsl fmidongl, m
  *AD: 449 fmidongl=0 are missing for fmidonsl (these are probably ongoing MIDs)
  tab warl fmidongl, m

***Peace-years

btscs midongl year dyadid, g(midyears)
gen midyears2=midyears^2
gen midyears3=midyears^3

btscs fmidongl year dyadid, g(fmidyears)
gen fmidyears2=fmidyears^2
gen fmidyears3=fmidyears^3

btscs warl year dyad, g(waryears)
gen waryears2=waryears^2
gen waryears3=waryears^3


kountry ccode1, from(cown)
rename NAMES_STD country_name1
 
kountry ccode2, from(cown) 
rename NAMES_STD country_name2

order country_name*
save "12-05-28_dem_CIE_analysis.dta", replace

keep ccode1 ccode2 year polity21 polity22 midonsl midongl fmidonsl fmidongl warl midyears midyears2 midyears3 fmidyears fmidyears2 fmidyears3 waryears waryears2 waryears3 cwmidnm cwongonm

sort ccode1 ccode2 year
save "12-06-13_mids.dta", replace
clear


use  "12-05-28_dem_CIE_analysis.dta"

**Merging:  CIEl (CIEh) (dmlCIEh) (lifepen) (gdppc)

sort ccode1 ccode2 year

rename year year1
sort ccode1 year1

merge m:1 ccode1 year1 using "beck_webb1.dta"
keep if _merge==3 | _merge==1
drop _merge


rename year1 year2
sort ccode2 year2

merge m:1 ccode2 year2 using "beck_webb2.dta"
keep if _merge==3 | _merge==1
drop _merge

rename year2 year

gen CIEnl=log(min(lifedeer1, lifedeer2))  if lifedeer1~=. & lifedeer2~=.
gen CIEnh=log(max(lifedeer1, lifedeer2))  if lifedeer1~=. & lifedeer2~=.


*set up sample
count if CIEnl==. & midongl~=. & ccode1~=. & ccode2~=. & year>1950
local A1= r(N)
count if midongl~=. & ccode1~=. & ccode2~=. & year>1950
local A2= r(N)
di `A1'/`A2'
*93% of the sample involves missing values of CIEl as generated from Beck and Webb's data! 

gen lifepenl=log(min(lifepen1, lifepen2))  if lifepen1~=. & lifepen2~=.
gen lifedensl=log(min(lifedens1, lifedens2))  if lifedens1~=. & lifedens2~=.
gen lifeexpl=log(min(lifeexp1, lifeexp2))  if lifeexp1~=. & lifeexp2~=.
gen aclil=log(min(acli1, acli2))  if acli1~=. & acli2~=.

*keep if ccode1~=. & ccode2~=. & year>1950 & polity21~=. & polity22~=.


**Merge original Gleditsch files
sort ccode1 ccode2 year
merge 1:1 ccode1 ccode2 year using "Other_files/gleditsch_dyad.dta"
drop if  _merge==2

drop  _merge

gen rgdpcl=min(rgdpca,rgdpcb) if rgdpca~=. & rgdpcb~=.

corr rgdpcl CIEnl
*0.84 
count if rgdpcl==. & midongl~=. & ccode1~=. & ccode2~=. & year>1950
local A1= r(N)
count if midongl~=. & ccode1~=. & ccode2~=. & year>1950
local A2= r(N)
di `A1'/`A2'
*2% of the sample involves missing values of rgdppc!!

order dyadid year midongl midonsl fmidonsl fmidongl warl abbrev1 abbrev2 dml CIEnl CIEnh rgdpcl mjpw dmh contig lndist lifepenl lifedensl aclil lifeexpl country_name1 country_name2 ccode1 ccode2 cwmid cwongo cwfatald pwin_cap lncprt contigl polity21 polity22 cap_1 cap_2 milper_1 milex_1 energy_1 irst_1 upop_1 tpop_1 milper_2 milex_2 energy_2 irst_2 upop_2 tpop_2 majpow1 majpow2 rlreg12 rlreg21 distance cpl cph rgdpca rgdpcb tottra tottrb popa popb lifepen1 acli1 income1 lifedens1 lifedeer1 lifeexp1 period2 lifepen2 acli2 income2 lifedens2 lifedeer2 lifeexp2 

keep dyadid year midongl midonsl fmidonsl fmidongl warl abbrev1 abbrev2 dml CIEnl CIEnh rgdpcl mjpw dmh contig lndist lifepenl lifedensl lifeexpl aclil country_name1 country_name2 ccode1 ccode2 cwmid cwongo cwfatald pwin_cap lncprt contigl polity21 polity22 cap_1 cap_2 milper_1 milex_1 energy_1 irst_1 upop_1 tpop_1 milper_2 milex_2 energy_2 irst_2 upop_2 tpop_2 majpow1 majpow2 rlreg12 rlreg21 distance cpl cph rgdpca rgdpcb tottra tottrb popa popb lifepen1 acli1 income1 lifedens1 lifedeer1 lifeexp1 period2 lifepen2 acli2 income2 lifedens2 lifedeer2 lifeexp2 

sort dyadid year

keep if midongl~=. & ccode1~=. & ccode2~=. & year>1950 & dml~=.

tab country_name1 if polity21==., sort
tab country_name2 if polity22==., sort
*Therefore, missing values on polity correspond to mostly small countries: Maldives, Samoa, Vanuatu, Iceland, Sychelles, Brunei, etc... I now drop them.

drop if dml==.


save "12-05-28_dem_CIE_analysis.dta", replace
saveold "12-05-28_dem_CIE_analysis2.dta", replace
